{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.core.interactiveshell import InteractiveShell\n",
    "InteractiveShell.ast_node_interactivity = 'all'  # default is ‘last_expr'\n",
    "\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('/Users/siyuyang/Source/repos/GitHub_MSFT/CameraTraps')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import os\n",
    "from collections import Counter, defaultdict\n",
    "from random import sample\n",
    "import math\n",
    "\n",
    "from tqdm import tqdm\n",
    "from unidecode import unidecode \n",
    "\n",
    "from data_management.megadb.schema import sequences_schema_check\n",
    "from data_management.annotations.add_bounding_boxes_to_megadb import *\n",
    "from data_management.megadb.converters.cct_to_megadb import make_cct_embedded, process_sequences, write_json"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Uploading the eMammal sets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/Users/siyuyang/OneDrive - Microsoft/AI4Earth/CameraTrap/Databases/databases_201904/emammal_kays/emammal_kays_20190409.json') as f:\n",
    "    kays_cct = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/Users/siyuyang/OneDrive - Microsoft/AI4Earth/CameraTrap/Databases/databases_201904/emammal_kays/emammal_kays_bboxes_20190409.json') as f:\n",
    "    kays_boxes_cct = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/Users/siyuyang/OneDrive - Microsoft/AI4Earth/CameraTrap/Databases/databases_201904/emammal_long/emammal_long_20190409.json') as f:\n",
    "    long_cct = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/Users/siyuyang/OneDrive - Microsoft/AI4Earth/CameraTrap/Databases/databases_201904/emammal_long/emammal_long_bboxes_20190409.json') as f:\n",
    "    long_boxes_cct = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/Users/siyuyang/OneDrive - Microsoft/AI4Earth/CameraTrap/Databases/databases_201904/emammal_mcshea/emammal_mcshea_20190409.json') as f:\n",
    "    mcshea_cct = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/Users/siyuyang/OneDrive - Microsoft/AI4Earth/CameraTrap/Databases/databases_201904/emammal_mcshea/emammal_mcshea_bboxes_20190409.json') as f:\n",
    "    mcshea_boxes_cct = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'seq_num_frames': 3,\n",
       "  'seq_id': 'd19135s194',\n",
       "  'location': 'NCSU_GW09',\n",
       "  'frame_num': 2,\n",
       "  'width': 2592,\n",
       "  'file_name': 'p101d19135/d19135s194i2.JPG',\n",
       "  'id': 'datasetemammalkays.projectp101.deploymentd19135.seqd19135s194.imgd19135s194i2',\n",
       "  'height': 1944,\n",
       "  'datetime': '2016-03-05 09:47:51'}]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sample(kays_cct['images'], 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'seq_num_frames': 3,\n",
       "  'seq_id': 'd34787s464',\n",
       "  'location': '2786-5',\n",
       "  'frame_num': 2,\n",
       "  'width': 2048,\n",
       "  'file_name': 'p158d34787/d34787s464i2.JPG',\n",
       "  'id': 'datasetemammallong.projectp158.deploymentd34787.seqd34787s464.imgd34787s464i2',\n",
       "  'height': 1536,\n",
       "  'datetime': '2017-01-29 09:02:23'}]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sample(long_cct['images'], 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'seq_num_frames': 40,\n",
       "  'seq_id': 'd21464s23',\n",
       "  'location': 'VWL Smith Property Ru 1_16',\n",
       "  'frame_num': 24,\n",
       "  'width': 2048,\n",
       "  'file_name': '3191d21464/d21464s23i24.JPG',\n",
       "  'id': 'datasetemammalmcshea.project3191.deploymentd21464.seqd21464s23.imgd21464s23i24',\n",
       "  'height': 1536,\n",
       "  'datetime': '2016-09-21 17:39:01'}]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sample(mcshea_cct['images'], 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "kays_folders = set([i['file_name'].split('/')[0] for i in kays_cct['images']])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "long_folders = set([i['file_name'].split('/')[0] for i in long_cct['images']])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "mcshea_folders = set([i['file_name'].split('/')[0] for i in mcshea_cct['images']])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2130"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(kays_folders)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "126"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(long_folders)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1010"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(mcshea_folders)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "set()"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "set()"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "kays_folders.intersection(long_folders)\n",
    "kays_folders.intersection(mcshea_folders)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "set()"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mcshea_folders.intersection(long_folders)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "emammal_folders = {\n",
    "    'wpz_emammal_2018': list(long_folders),\n",
    "    'mcshea_emammal': list(mcshea_folders),\n",
    "    'kays_emammal': list(kays_folders)\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/Users/siyuyang/OneDrive - Microsoft/AI4Earth/CameraTrap/Databases/databases_201904/emammal_folders.json', 'w') as f:\n",
    "    json.dump(emammal_folders, f, indent=4)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## wpz_emammal_2018\n",
    "\n",
    "This is the Robert Long set."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading image DB...\n",
      "Number of items from the image DB: 98486\n",
      "Number of images with more than 1 species: 17010 (17.27% of image DB)\n",
      "Loading bbox DB...\n",
      "Number of images added from bbox DB entries:  0\n",
      "Number of images amended:  0\n",
      "Number of items in total:  98486\n",
      "Number of images with more than one bounding box: 547 (0.5554088906037407% of all entries)\n"
     ]
    }
   ],
   "source": [
    "wpz_embedded = make_cct_embedded(image_db=long_cct, bbox_db=long_boxes_cct)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## mcshea_emammal\n",
    "\n",
    "Problem with `seq_id` - this is only unique within a deployment. Fixed below.\n",
    "\n",
    "`'id': 'datasetemammalmcshea.projectp195.deploymentL-HY11-070.seq1.imgL-HY11-070-003'`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading image DB...\n",
      "Number of items from the image DB: 504433\n",
      "Number of images with more than 1 species: 4794 (0.95% of image DB)\n",
      "Loading bbox DB...\n",
      "Number of images added from bbox DB entries:  0\n",
      "Number of images amended:  0\n",
      "Number of items in total:  504433\n",
      "Number of images with more than one bounding box: 343 (0.06799713737998901% of all entries)\n"
     ]
    }
   ],
   "source": [
    "mcshea_embedded = make_cct_embedded(image_db=mcshea_cct, bbox_db=mcshea_boxes_cct)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'seq_num_frames': 15,\n",
       "  'seq_id': 'd31825s27',\n",
       "  'location': 'VWL Shelby Ru 7_17',\n",
       "  'frame_num': 2,\n",
       "  'width': 2048,\n",
       "  'file_name': '3191d31825/d31825s27i2.JPG',\n",
       "  'id': 'datasetemammalmcshea.project3191.deploymentd31825.seqd31825s27.imgd31825s27i2',\n",
       "  'height': 1536,\n",
       "  'datetime': '2017-08-19 17:05:51',\n",
       "  'annotations': {'species': ['domestic cow']}}]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sample(mcshea_embedded, 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "for e in mcshea_embedded:\n",
    "    parts = e['id'].split('.')\n",
    "    correct_seq_id = '.'.join(parts[1:4])\n",
    "    e['seq_id'] = correct_seq_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'seq_num_frames': 60,\n",
       "  'seq_id': 'project3191.deploymentd19850.seqd19850s60',\n",
       "  'location': 'VWL Stonebridge Ru 1_16',\n",
       "  'frame_num': 27,\n",
       "  'width': 2048,\n",
       "  'file_name': '3191d19850/d19850s60i27.JPG',\n",
       "  'id': 'datasetemammalmcshea.project3191.deploymentd19850.seqd19850s60.imgd19850s60i27',\n",
       "  'height': 1536,\n",
       "  'datetime': '2016-06-29 11:54:10',\n",
       "  'annotations': {'species': ['empty']}}]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sample(mcshea_embedded, 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "# sequences with frame_num that are not unique\n",
    "\n",
    "problem_sequences = []\n",
    "\n",
    "for seq in sequences:\n",
    "    if 'images' not in seq:\n",
    "        continue\n",
    "        \n",
    "    # if there are more than one image item, each needs a frame_num\n",
    "    if len(seq['images']) > 1:\n",
    "        frame_num_set = []\n",
    "        for i in seq['images']:\n",
    "            if 'frame_num' not in i:\n",
    "                assert False, 'sequence {} has more than one image but not all images have frame_num'.format(seq['seq_id'])\n",
    "\n",
    "            frame_num_set.append(i['frame_num'])\n",
    "            \n",
    "        if len(set(frame_num_set)) != len(seq['images']):\n",
    "            problem_sequences.append(seq)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(problem_sequences)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "# seems like the deployment info in the id field is wrong (according to file name) - no bbox - drop this.\n",
    "mcshea_sequences = []\n",
    "\n",
    "for seq in sequences:\n",
    "    if seq['seq_id'] != 'project3191.deploymentd17359.seqd17359s3':\n",
    "        mcshea_sequences.append(seq)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "for seq in mcshea_sequences:\n",
    "    if 'class' in seq:\n",
    "        class_norm = []\n",
    "        for c in seq['class']:\n",
    "            class_norm.append(unidecode(c))\n",
    "        seq['class'] = class_norm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Verified that the sequence items meet requirements not captured by the schema.\n",
      "Verified that the sequence items conform to the schema.\n"
     ]
    }
   ],
   "source": [
    "sequences_schema_check.sequences_schema_check(mcshea_sequences)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "write_json(out_path, mcshea_sequences)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## kays_emammal"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading image DB...\n",
      "Number of items from the image DB: 1155991\n",
      "Number of images with more than 1 species: 36329 (3.14% of image DB)\n",
      "Loading bbox DB...\n",
      "Number of images added from bbox DB entries:  0\n",
      "Number of images amended:  9477\n",
      "Number of items in total:  1155991\n",
      "Number of images with more than one bounding box: 1529 (0.13226746575016587% of all entries)\n"
     ]
    }
   ],
   "source": [
    "kays_embedded = make_cct_embedded(image_db=kays_cct, bbox_db=kays_boxes_cct)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1155991"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "[{'seq_num_frames': 45,\n",
       "  'seq_id': 'd26309s34',\n",
       "  'location': 'Harrison_D_Greene4',\n",
       "  'frame_num': 19,\n",
       "  'width': 1920,\n",
       "  'file_name': 'p168d26309/d26309s34i19.JPG',\n",
       "  'id': 'datasetemammalkays.projectp168.deploymentd26309.seqd26309s34.imgd26309s34i19',\n",
       "  'height': 1080,\n",
       "  'datetime': '2017-06-12 03:45:44',\n",
       "  'annotations': {'species': ['white-tailed deer']}}]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(kays_embedded)\n",
    "sample(kays_embedded, 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "for e in kays_embedded:\n",
    "    parts = e['id'].split('.')\n",
    "    correct_seq_id = '.'.join(parts[1:4])\n",
    "    e['seq_id'] = correct_seq_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'seq_num_frames': 3,\n",
       "  'seq_id': 'projectp168.deploymentd21818.seqd21818s201',\n",
       "  'location': 'Caveny_J_Avery1',\n",
       "  'frame_num': 1,\n",
       "  'width': 1920,\n",
       "  'file_name': 'p168d21818/d21818s201i1.JPG',\n",
       "  'id': 'datasetemammalkays.projectp168.deploymentd21818.seqd21818s201.imgd21818s201i1',\n",
       "  'height': 1440,\n",
       "  'datetime': '2016-12-21 07:48:43',\n",
       "  'annotations': {'species': ['white-tailed deer']}}]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sample(kays_embedded, 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# some frame_num were not unique\n",
    "\n",
    "problem_sequences = []\n",
    "\n",
    "for seq in sequences:\n",
    "    if 'images' not in seq:\n",
    "        continue\n",
    "        \n",
    "    # if there are more than one image item, each needs a frame_num\n",
    "    if len(seq['images']) > 1:\n",
    "        frame_num_set = []\n",
    "        for i in seq['images']:\n",
    "            if 'frame_num' not in i:\n",
    "                assert False, 'sequence {} has more than one image but not all images have frame_num'.format(seq['seq_id'])\n",
    "\n",
    "            frame_num_set.append(i['frame_num'])\n",
    "            \n",
    "        if len(set(frame_num_set)) != len(seq['images']):\n",
    "            problem_sequences.append(seq)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "# only one had issues, no bbox, dropping it\n",
    "\n",
    "kays_sequences = []\n",
    "for e in sequences:\n",
    "    if e['seq_id'] != 'projectp168.deploymentd22148.seqd22148s113':\n",
    "        kays_sequences.append(e)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Verified that the sequence items meet requirements not captured by the schema.\n",
      "Verified that the sequence items conform to the schema.\n"
     ]
    }
   ],
   "source": [
    "sequences_schema_check.sequences_schema_check(kays_sequences)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "write_json(out_path, kays_sequences)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Make MegaDB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "embedded = kays_embedded\n",
    "dataset_name = 'kays_emammal'\n",
    "out_path = '/Users/siyuyang/OneDrive - Microsoft/AI4Earth/CameraTrap/Databases/megadb_batches_9_10_11/{}.json'.format(dataset_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The dataset_name is set to kays_emammal. Please make sure this is correct!\n",
      "Making a deep copy of docs...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  6%|▋         | 72887/1155991 [00:00<00:02, 364382.35it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Putting 1155991 images into sequences...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1155991/1155991 [00:03<00:00, 380572.31it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of sequences: 106755\n",
      "Checking the location field...\n",
      "Checking which fields in a CCT image entry are sequence-level...\n",
      "\n",
      "all_img_properties\n",
      "{'id', 'datetime', 'class', 'bbox', 'frame_num', 'location', 'file'}\n",
      "\n",
      "img_level_properties\n",
      "{'id', 'datetime', 'bbox', 'frame_num', 'file'}\n",
      "\n",
      "image-level properties that really should be sequence-level\n",
      "{'class', 'location'}\n",
      "\n",
      "Finished processing sequences.\n",
      "Example sequence items:\n",
      "\n",
      "{'seq_id': 'projectp168.deploymentd22883.seqd22883s1', 'dataset': 'kays_emammal', 'images': [{'frame_num': 1, 'id': 'datasetemammalkays.projectp168.deploymentd22883.seqd22883s1.imgd22883s1i1', 'datetime': '2017-03-26 08:09:37', 'file': 'p168d22883/d22883s1i1.JPG'}, {'frame_num': 2, 'id': 'datasetemammalkays.projectp168.deploymentd22883.seqd22883s1.imgd22883s1i2', 'datetime': '2017-03-26 08:09:38', 'file': 'p168d22883/d22883s1i2.JPG'}, {'frame_num': 3, 'id': 'datasetemammalkays.projectp168.deploymentd22883.seqd22883s1.imgd22883s1i3', 'datetime': '2017-03-26 08:09:39', 'file': 'p168d22883/d22883s1i3.JPG'}, {'frame_num': 4, 'id': 'datasetemammalkays.projectp168.deploymentd22883.seqd22883s1.imgd22883s1i4', 'datetime': '2017-03-26 08:09:40', 'file': 'p168d22883/d22883s1i4.JPG'}, {'frame_num': 5, 'id': 'datasetemammalkays.projectp168.deploymentd22883.seqd22883s1.imgd22883s1i5', 'datetime': '2017-03-26 08:09:40', 'file': 'p168d22883/d22883s1i5.JPG'}], 'class': ['domestic dog'], 'location': 'Myles_B_Union2'}\n",
      "\n",
      "[{'seq_id': 'projectp166.deploymentd19921.seqd19921s104', 'dataset': 'kays_emammal', 'images': [{'frame_num': 1, 'id': 'datasetemammalkays.projectp166.deploymentd19921.seqd19921s104.imgd19921s104i1', 'datetime': '2016-06-09 17:22:15', 'file': 'p166d19921/d19921s104i1.JPG'}, {'frame_num': 2, 'id': 'datasetemammalkays.projectp166.deploymentd19921.seqd19921s104.imgd19921s104i2', 'datetime': '2016-06-09 17:22:17', 'file': 'p166d19921/d19921s104i2.JPG'}, {'frame_num': 3, 'id': 'datasetemammalkays.projectp166.deploymentd19921.seqd19921s104.imgd19921s104i3', 'datetime': '2016-06-09 17:22:18', 'file': 'p166d19921/d19921s104i3.JPG'}, {'frame_num': 4, 'id': 'datasetemammalkays.projectp166.deploymentd19921.seqd19921s104.imgd19921s104i4', 'datetime': '2016-06-09 17:22:18', 'file': 'p166d19921/d19921s104i4.JPG'}, {'frame_num': 5, 'id': 'datasetemammalkays.projectp166.deploymentd19921.seqd19921s104.imgd19921s104i5', 'datetime': '2016-06-09 17:22:19', 'file': 'p166d19921/d19921s104i5.JPG'}], 'class': ['eastern gray squirrel'], 'location': 'K_Mann_Garden'}]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "sequences = process_sequences(embedded, dataset_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sequences_schema_check.sequences_schema_check(sequences)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "write_json(out_path, sequences)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:cameratraps] *",
   "language": "python",
   "name": "conda-env-cameratraps-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
